# Importing libraries
from __future__ import division
from IPython.display import Image
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from pandas import read_csv
import seaborn as sb
from textwrap import wrap
import statsmodels.api as sm
from datetime import datetime, timedelta,date
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import KFold, cross_val_score, train_test_split
import seaborn as sns
from sklearn.cluster import KMeans
import plotly as py
import plotly.express as px
import plotly.offline as pyoff
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import xgboost as xgb
import plotly.graph_objs as go
import plotly.offline as py
pd.options.mode.chained_assignment = None # default='warn'
# Loading the data
filename = 'customer_segmentation.csv'
df = read_csv(filename, encoding="ISO-8859-1")
df.head(3)
df.isna().sum()
df = df.dropna()
df.isna().sum()
df.dtypes
#converting the type of Invoice Date Field from string to datetime.
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
#creating YearMonth field for the ease of reporting and visualization
df['InvoiceYearMonth'] = df['InvoiceDate'].map(lambda date: 100*date.year + date.month)
df.dtypes
# Let's see which country gets maximum number of orders
fig = px.histogram(df, x="Country")
fig.update_layout(xaxis = go.layout.XAxis(tickangle = 45))
fig.show()